data_url <- "https://data.ontario.ca/dataset/b1fef838-8784-4338-8ef9-ae7cfd405b41/resource/dc5c8788-792f-4f91-a400-036cdf28cfe8/download/schoolrecentcovid2021_2022.csv"
school_data <- read_csv(data_url)
## Rows: 50299 Columns: 10
## ── Column specification ────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): school_board, school_id, school, municipality
## dbl (4): confirmed_student_cases, confirmed_staff_cases, confirmed_unspecif...
## date (2): collected_date, reported_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
daily_cases <- school_data %>%
group_by(reported_date) %>%
summarise(total_cases = sum(total_confirmed_cases, na.rm = TRUE))
cumulative_cases <- daily_cases %>%
mutate(cumulative = cumsum(total_cases))
top_boards <- school_data %>%
group_by(school_board) %>%
summarise(total_cases = sum(total_confirmed_cases, na.rm = TRUE)) %>%
arrange(desc(total_cases)) %>%
slice_head(n = 10)
ggplot(daily_cases, aes(x = reported_date, y = total_cases)) +
geom_line(color = "#2980b9") +
labs(title = "Daily Reported Cases", x = "Date", y = "Total Cases") +
theme_minimal()
### Cumulative Trends {.row}
ggplot(cumulative_cases, aes(x = reported_date, y = cumulative)) +
geom_line(color = "#27ae60") +
labs(title = "Cumulative Cases Over Time", x = "Date", y = "Cumulative Cases") +
theme_minimal()
### Top Boards {.row}
ggplot(top_boards, aes(x = reorder(school_board, total_cases), y = total_cases)) +
geom_col(fill = "#9b59b6") +
coord_flip() +
labs(title = "Top 10 School Boards by Total Cases", x = "School Board", y = "Cases") +
theme_minimal()